package crawl

import org.jsoup.Jsoup
import org.jsoup.nodes.Document

object LinkParser {
    type HtmlPage = String
    def parse(page: HtmlPage): Set[String] ={
        val doc:Document = Jsoup.parse(page)
        var links = Set.empty[String]
        links = links ++ parse(doc, "a", "href")
        links = links ++ parse(doc, "script", "src")
        links
    }

    private def parse(doc:Document, tag: String, attr: String):Set[String] = {
        val es = doc.select(tag).iterator()
        var urls = Set.empty[String]
        while (es.hasNext){
            val e = es.next()
            urls = urls + e.attr(attr)
        }
        urls
    }

}
